{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import pandas as pd\n",
    "import matplotlib.pyplot as plt\n",
    "import seaborn as sns\n",
    "import anndata as ad\n",
    "import scanpy as sc"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "pbmc = sc.read(\"pbmc3k.h5ad\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "AnnData object with n_obs × n_vars = 2638 × 1838\n",
       "    obs: 'n_genes', 'n_genes_by_counts', 'total_counts', 'total_counts_mt', 'pct_counts_mt', 'leiden'\n",
       "    var: 'gene_ids', 'n_cells', 'mt', 'n_cells_by_counts', 'mean_counts', 'pct_dropout_by_counts', 'total_counts', 'highly_variable', 'means', 'dispersions', 'dispersions_norm', 'mean', 'std'\n",
       "    uns: 'hvg', 'leiden', 'leiden_colors', 'neighbors', 'pca', 'rank_genes_groups', 'umap'\n",
       "    obsm: 'X_pca', 'X_umap'\n",
       "    varm: 'PCs'\n",
       "    obsp: 'connectivities', 'distances'"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pbmc"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[-0.17146961, -0.28301534, -0.02940388, ..., -0.10964561,\n",
       "        -0.20923848, -0.5312033 ],\n",
       "       [-0.21458235, -0.3787022 , -0.04915038, ..., -0.31006348,\n",
       "        -0.31351015, -0.5966543 ],\n",
       "       [-0.3768877 , -0.2979373 , -0.104679  , ..., -0.16528179,\n",
       "        -0.17088924,  1.3789997 ],\n",
       "       ...,\n",
       "       [-0.20708963, -0.25140953, -0.04099827, ..., -0.04956494,\n",
       "        -0.16109322,  2.041497  ],\n",
       "       [-0.1903285 , -0.2262672 , -0.03383566, ...,  0.00376277,\n",
       "        -0.13518843, -0.48211104],\n",
       "       [-0.33378938, -0.25469664, -0.08717481, ..., -0.07181546,\n",
       "        -0.13025972, -0.47133783]], dtype=float32)"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pbmc.X"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(2638, 1838)"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pbmc.X.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>n_genes</th>\n",
       "      <th>n_genes_by_counts</th>\n",
       "      <th>total_counts</th>\n",
       "      <th>total_counts_mt</th>\n",
       "      <th>pct_counts_mt</th>\n",
       "      <th>leiden</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>index</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>AAACATACAACCAC-1</th>\n",
       "      <td>781</td>\n",
       "      <td>779</td>\n",
       "      <td>2419.0</td>\n",
       "      <td>73.0</td>\n",
       "      <td>3.017776</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>AAACATTGAGCTAC-1</th>\n",
       "      <td>1352</td>\n",
       "      <td>1352</td>\n",
       "      <td>4903.0</td>\n",
       "      <td>186.0</td>\n",
       "      <td>3.793596</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>AAACATTGATCAGC-1</th>\n",
       "      <td>1131</td>\n",
       "      <td>1129</td>\n",
       "      <td>3147.0</td>\n",
       "      <td>28.0</td>\n",
       "      <td>0.889736</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>AAACCGTGCTTCCG-1</th>\n",
       "      <td>960</td>\n",
       "      <td>960</td>\n",
       "      <td>2639.0</td>\n",
       "      <td>46.0</td>\n",
       "      <td>1.743085</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>AAACCGTGTATGCG-1</th>\n",
       "      <td>522</td>\n",
       "      <td>521</td>\n",
       "      <td>980.0</td>\n",
       "      <td>12.0</td>\n",
       "      <td>1.224490</td>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>TTTCGAACTCTCAT-1</th>\n",
       "      <td>1155</td>\n",
       "      <td>1153</td>\n",
       "      <td>3459.0</td>\n",
       "      <td>73.0</td>\n",
       "      <td>2.110436</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>TTTCTACTGAGGCA-1</th>\n",
       "      <td>1227</td>\n",
       "      <td>1224</td>\n",
       "      <td>3443.0</td>\n",
       "      <td>32.0</td>\n",
       "      <td>0.929422</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>TTTCTACTTCCTCG-1</th>\n",
       "      <td>622</td>\n",
       "      <td>622</td>\n",
       "      <td>1684.0</td>\n",
       "      <td>37.0</td>\n",
       "      <td>2.197150</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>TTTGCATGAGAGGC-1</th>\n",
       "      <td>454</td>\n",
       "      <td>452</td>\n",
       "      <td>1022.0</td>\n",
       "      <td>21.0</td>\n",
       "      <td>2.054795</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>TTTGCATGCCTCAC-1</th>\n",
       "      <td>724</td>\n",
       "      <td>723</td>\n",
       "      <td>1984.0</td>\n",
       "      <td>16.0</td>\n",
       "      <td>0.806452</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>2638 rows × 6 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                  n_genes  n_genes_by_counts  total_counts  total_counts_mt  \\\n",
       "index                                                                         \n",
       "AAACATACAACCAC-1      781                779        2419.0             73.0   \n",
       "AAACATTGAGCTAC-1     1352               1352        4903.0            186.0   \n",
       "AAACATTGATCAGC-1     1131               1129        3147.0             28.0   \n",
       "AAACCGTGCTTCCG-1      960                960        2639.0             46.0   \n",
       "AAACCGTGTATGCG-1      522                521         980.0             12.0   \n",
       "...                   ...                ...           ...              ...   \n",
       "TTTCGAACTCTCAT-1     1155               1153        3459.0             73.0   \n",
       "TTTCTACTGAGGCA-1     1227               1224        3443.0             32.0   \n",
       "TTTCTACTTCCTCG-1      622                622        1684.0             37.0   \n",
       "TTTGCATGAGAGGC-1      454                452        1022.0             21.0   \n",
       "TTTGCATGCCTCAC-1      724                723        1984.0             16.0   \n",
       "\n",
       "                  pct_counts_mt leiden  \n",
       "index                                   \n",
       "AAACATACAACCAC-1       3.017776      3  \n",
       "AAACATTGAGCTAC-1       3.793596      2  \n",
       "AAACATTGATCAGC-1       0.889736      0  \n",
       "AAACCGTGCTTCCG-1       1.743085      1  \n",
       "AAACCGTGTATGCG-1       1.224490      4  \n",
       "...                         ...    ...  \n",
       "TTTCGAACTCTCAT-1       2.110436      1  \n",
       "TTTCTACTGAGGCA-1       0.929422      2  \n",
       "TTTCTACTTCCTCG-1       2.197150      2  \n",
       "TTTGCATGAGAGGC-1       2.054795      2  \n",
       "TTTGCATGCCTCAC-1       0.806452      0  \n",
       "\n",
       "[2638 rows x 6 columns]"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pbmc.obs"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>gene_ids</th>\n",
       "      <th>n_cells</th>\n",
       "      <th>mt</th>\n",
       "      <th>n_cells_by_counts</th>\n",
       "      <th>mean_counts</th>\n",
       "      <th>pct_dropout_by_counts</th>\n",
       "      <th>total_counts</th>\n",
       "      <th>highly_variable</th>\n",
       "      <th>means</th>\n",
       "      <th>dispersions</th>\n",
       "      <th>dispersions_norm</th>\n",
       "      <th>mean</th>\n",
       "      <th>std</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>index</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>TNFRSF4</th>\n",
       "      <td>ENSG00000186827</td>\n",
       "      <td>155</td>\n",
       "      <td>False</td>\n",
       "      <td>155</td>\n",
       "      <td>0.077407</td>\n",
       "      <td>94.259259</td>\n",
       "      <td>209.0</td>\n",
       "      <td>True</td>\n",
       "      <td>0.277410</td>\n",
       "      <td>2.086050</td>\n",
       "      <td>0.665406</td>\n",
       "      <td>-9.596541e-10</td>\n",
       "      <td>1.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>CPSF3L</th>\n",
       "      <td>ENSG00000127054</td>\n",
       "      <td>202</td>\n",
       "      <td>False</td>\n",
       "      <td>202</td>\n",
       "      <td>0.094815</td>\n",
       "      <td>92.518519</td>\n",
       "      <td>256.0</td>\n",
       "      <td>True</td>\n",
       "      <td>0.385194</td>\n",
       "      <td>4.506987</td>\n",
       "      <td>2.955005</td>\n",
       "      <td>-6.213524e-10</td>\n",
       "      <td>0.999605</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>ATAD3C</th>\n",
       "      <td>ENSG00000215915</td>\n",
       "      <td>9</td>\n",
       "      <td>False</td>\n",
       "      <td>9</td>\n",
       "      <td>0.009259</td>\n",
       "      <td>99.666667</td>\n",
       "      <td>25.0</td>\n",
       "      <td>True</td>\n",
       "      <td>0.038252</td>\n",
       "      <td>3.953486</td>\n",
       "      <td>4.352607</td>\n",
       "      <td>-9.898943e-12</td>\n",
       "      <td>0.553626</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>C1orf86</th>\n",
       "      <td>ENSG00000162585</td>\n",
       "      <td>501</td>\n",
       "      <td>False</td>\n",
       "      <td>501</td>\n",
       "      <td>0.227778</td>\n",
       "      <td>81.444444</td>\n",
       "      <td>615.0</td>\n",
       "      <td>True</td>\n",
       "      <td>0.678283</td>\n",
       "      <td>2.713522</td>\n",
       "      <td>0.543183</td>\n",
       "      <td>-5.648659e-11</td>\n",
       "      <td>1.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>RER1</th>\n",
       "      <td>ENSG00000157916</td>\n",
       "      <td>608</td>\n",
       "      <td>False</td>\n",
       "      <td>608</td>\n",
       "      <td>0.298148</td>\n",
       "      <td>77.481481</td>\n",
       "      <td>805.0</td>\n",
       "      <td>True</td>\n",
       "      <td>0.814813</td>\n",
       "      <td>3.447533</td>\n",
       "      <td>1.582528</td>\n",
       "      <td>-3.389195e-11</td>\n",
       "      <td>1.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>ICOSLG</th>\n",
       "      <td>ENSG00000160223</td>\n",
       "      <td>34</td>\n",
       "      <td>False</td>\n",
       "      <td>34</td>\n",
       "      <td>0.016667</td>\n",
       "      <td>98.740741</td>\n",
       "      <td>45.0</td>\n",
       "      <td>True</td>\n",
       "      <td>0.082016</td>\n",
       "      <td>2.585818</td>\n",
       "      <td>1.652185</td>\n",
       "      <td>1.165421e-09</td>\n",
       "      <td>0.985887</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>SUMO3</th>\n",
       "      <td>ENSG00000184900</td>\n",
       "      <td>570</td>\n",
       "      <td>False</td>\n",
       "      <td>570</td>\n",
       "      <td>0.292963</td>\n",
       "      <td>78.888889</td>\n",
       "      <td>791.0</td>\n",
       "      <td>True</td>\n",
       "      <td>0.804815</td>\n",
       "      <td>4.046776</td>\n",
       "      <td>2.431045</td>\n",
       "      <td>-1.157975e-09</td>\n",
       "      <td>1.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>SLC19A1</th>\n",
       "      <td>ENSG00000173638</td>\n",
       "      <td>31</td>\n",
       "      <td>False</td>\n",
       "      <td>31</td>\n",
       "      <td>0.018519</td>\n",
       "      <td>98.851852</td>\n",
       "      <td>50.0</td>\n",
       "      <td>True</td>\n",
       "      <td>0.058960</td>\n",
       "      <td>3.234231</td>\n",
       "      <td>2.932458</td>\n",
       "      <td>-6.231287e-10</td>\n",
       "      <td>0.946179</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>S100B</th>\n",
       "      <td>ENSG00000160307</td>\n",
       "      <td>94</td>\n",
       "      <td>False</td>\n",
       "      <td>94</td>\n",
       "      <td>0.076667</td>\n",
       "      <td>96.518519</td>\n",
       "      <td>207.0</td>\n",
       "      <td>True</td>\n",
       "      <td>0.286282</td>\n",
       "      <td>3.042992</td>\n",
       "      <td>1.078783</td>\n",
       "      <td>-7.653932e-10</td>\n",
       "      <td>0.999970</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>PRMT2</th>\n",
       "      <td>ENSG00000160310</td>\n",
       "      <td>588</td>\n",
       "      <td>False</td>\n",
       "      <td>588</td>\n",
       "      <td>0.275926</td>\n",
       "      <td>78.222222</td>\n",
       "      <td>745.0</td>\n",
       "      <td>True</td>\n",
       "      <td>0.816647</td>\n",
       "      <td>2.774169</td>\n",
       "      <td>0.629058</td>\n",
       "      <td>-4.405954e-10</td>\n",
       "      <td>1.000000</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>1838 rows × 13 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                gene_ids  n_cells     mt  n_cells_by_counts  mean_counts  \\\n",
       "index                                                                      \n",
       "TNFRSF4  ENSG00000186827      155  False                155     0.077407   \n",
       "CPSF3L   ENSG00000127054      202  False                202     0.094815   \n",
       "ATAD3C   ENSG00000215915        9  False                  9     0.009259   \n",
       "C1orf86  ENSG00000162585      501  False                501     0.227778   \n",
       "RER1     ENSG00000157916      608  False                608     0.298148   \n",
       "...                  ...      ...    ...                ...          ...   \n",
       "ICOSLG   ENSG00000160223       34  False                 34     0.016667   \n",
       "SUMO3    ENSG00000184900      570  False                570     0.292963   \n",
       "SLC19A1  ENSG00000173638       31  False                 31     0.018519   \n",
       "S100B    ENSG00000160307       94  False                 94     0.076667   \n",
       "PRMT2    ENSG00000160310      588  False                588     0.275926   \n",
       "\n",
       "         pct_dropout_by_counts  total_counts  highly_variable     means  \\\n",
       "index                                                                     \n",
       "TNFRSF4              94.259259         209.0             True  0.277410   \n",
       "CPSF3L               92.518519         256.0             True  0.385194   \n",
       "ATAD3C               99.666667          25.0             True  0.038252   \n",
       "C1orf86              81.444444         615.0             True  0.678283   \n",
       "RER1                 77.481481         805.0             True  0.814813   \n",
       "...                        ...           ...              ...       ...   \n",
       "ICOSLG               98.740741          45.0             True  0.082016   \n",
       "SUMO3                78.888889         791.0             True  0.804815   \n",
       "SLC19A1              98.851852          50.0             True  0.058960   \n",
       "S100B                96.518519         207.0             True  0.286282   \n",
       "PRMT2                78.222222         745.0             True  0.816647   \n",
       "\n",
       "         dispersions  dispersions_norm          mean       std  \n",
       "index                                                           \n",
       "TNFRSF4     2.086050          0.665406 -9.596541e-10  1.000000  \n",
       "CPSF3L      4.506987          2.955005 -6.213524e-10  0.999605  \n",
       "ATAD3C      3.953486          4.352607 -9.898943e-12  0.553626  \n",
       "C1orf86     2.713522          0.543183 -5.648659e-11  1.000000  \n",
       "RER1        3.447533          1.582528 -3.389195e-11  1.000000  \n",
       "...              ...               ...           ...       ...  \n",
       "ICOSLG      2.585818          1.652185  1.165421e-09  0.985887  \n",
       "SUMO3       4.046776          2.431045 -1.157975e-09  1.000000  \n",
       "SLC19A1     3.234231          2.932458 -6.231287e-10  0.946179  \n",
       "S100B       3.042992          1.078783 -7.653932e-10  0.999970  \n",
       "PRMT2       2.774169          0.629058 -4.405954e-10  1.000000  \n",
       "\n",
       "[1838 rows x 13 columns]"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pbmc.var"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Index(['AAACATACAACCAC-1', 'AAACATTGAGCTAC-1', 'AAACATTGATCAGC-1',\n",
       "       'AAACCGTGCTTCCG-1', 'AAACCGTGTATGCG-1', 'AAACGCACTGGTAC-1',\n",
       "       'AAACGCTGACCAGT-1', 'AAACGCTGGTTCTT-1', 'AAACGCTGTAGCCA-1',\n",
       "       'AAACGCTGTTTCTG-1',\n",
       "       ...\n",
       "       'TTTCAGTGTCACGA-1', 'TTTCAGTGTCTATC-1', 'TTTCAGTGTGCAGT-1',\n",
       "       'TTTCCAGAGGTGAG-1', 'TTTCGAACACCTGA-1', 'TTTCGAACTCTCAT-1',\n",
       "       'TTTCTACTGAGGCA-1', 'TTTCTACTTCCTCG-1', 'TTTGCATGAGAGGC-1',\n",
       "       'TTTGCATGCCTCAC-1'],\n",
       "      dtype='object', name='index', length=2638)"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pbmc.obs_names"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Index(['TNFRSF4', 'CPSF3L', 'ATAD3C', 'C1orf86', 'RER1', 'TNFRSF25', 'TNFRSF9',\n",
       "       'CTNNBIP1', 'SRM', 'UBIAD1',\n",
       "       ...\n",
       "       'DSCR3', 'BRWD1', 'BACE2', 'SIK1', 'C21orf33', 'ICOSLG', 'SUMO3',\n",
       "       'SLC19A1', 'S100B', 'PRMT2'],\n",
       "      dtype='object', name='index', length=1838)"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pbmc.var_names"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "View of AnnData object with n_obs × n_vars = 5 × 5\n",
       "    obs: 'n_genes', 'n_genes_by_counts', 'total_counts', 'total_counts_mt', 'pct_counts_mt', 'leiden'\n",
       "    var: 'gene_ids', 'n_cells', 'mt', 'n_cells_by_counts', 'mean_counts', 'pct_dropout_by_counts', 'total_counts', 'highly_variable', 'means', 'dispersions', 'dispersions_norm', 'mean', 'std'\n",
       "    uns: 'hvg', 'leiden', 'leiden_colors', 'neighbors', 'pca', 'rank_genes_groups', 'umap'\n",
       "    obsm: 'X_pca', 'X_umap'\n",
       "    varm: 'PCs'\n",
       "    obsp: 'connectivities', 'distances'"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#subset\n",
    "pbmc[pbmc.obs_names[:5],pbmc.var_names[:5]]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "View of AnnData object with n_obs × n_vars = 2 × 2\n",
       "    obs: 'n_genes', 'n_genes_by_counts', 'total_counts', 'total_counts_mt', 'pct_counts_mt', 'leiden'\n",
       "    var: 'gene_ids', 'n_cells', 'mt', 'n_cells_by_counts', 'mean_counts', 'pct_dropout_by_counts', 'total_counts', 'highly_variable', 'means', 'dispersions', 'dispersions_norm', 'mean', 'std'\n",
       "    uns: 'hvg', 'leiden', 'leiden_colors', 'neighbors', 'pca', 'rank_genes_groups', 'umap'\n",
       "    obsm: 'X_pca', 'X_umap'\n",
       "    varm: 'PCs'\n",
       "    obsp: 'connectivities', 'distances'"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#subset\n",
    "pbmc[[1,2],[1,2]]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>n_genes</th>\n",
       "      <th>n_genes_by_counts</th>\n",
       "      <th>total_counts</th>\n",
       "      <th>total_counts_mt</th>\n",
       "      <th>pct_counts_mt</th>\n",
       "      <th>leiden</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>index</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>AAACATTGAGCTAC-1</th>\n",
       "      <td>1352</td>\n",
       "      <td>1352</td>\n",
       "      <td>4903.0</td>\n",
       "      <td>186.0</td>\n",
       "      <td>3.793596</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>AAACATTGATCAGC-1</th>\n",
       "      <td>1131</td>\n",
       "      <td>1129</td>\n",
       "      <td>3147.0</td>\n",
       "      <td>28.0</td>\n",
       "      <td>0.889736</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>AAACTTGAAAAACG-1</th>\n",
       "      <td>1116</td>\n",
       "      <td>1112</td>\n",
       "      <td>3914.0</td>\n",
       "      <td>103.0</td>\n",
       "      <td>2.631579</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>AAAGAGACGCGAGA-1</th>\n",
       "      <td>1059</td>\n",
       "      <td>1058</td>\n",
       "      <td>3033.0</td>\n",
       "      <td>43.0</td>\n",
       "      <td>1.417738</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>AAAGCAGATATCGG-1</th>\n",
       "      <td>1424</td>\n",
       "      <td>1422</td>\n",
       "      <td>4584.0</td>\n",
       "      <td>64.0</td>\n",
       "      <td>1.396161</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>TTTAGCTGTACTCT-1</th>\n",
       "      <td>1567</td>\n",
       "      <td>1560</td>\n",
       "      <td>5671.0</td>\n",
       "      <td>120.0</td>\n",
       "      <td>2.116029</td>\n",
       "      <td>6</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>TTTATCCTGTTGTG-1</th>\n",
       "      <td>1156</td>\n",
       "      <td>1155</td>\n",
       "      <td>3679.0</td>\n",
       "      <td>48.0</td>\n",
       "      <td>1.304702</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>TTTCGAACACCTGA-1</th>\n",
       "      <td>1544</td>\n",
       "      <td>1539</td>\n",
       "      <td>4455.0</td>\n",
       "      <td>58.0</td>\n",
       "      <td>1.301908</td>\n",
       "      <td>6</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>TTTCGAACTCTCAT-1</th>\n",
       "      <td>1155</td>\n",
       "      <td>1153</td>\n",
       "      <td>3459.0</td>\n",
       "      <td>73.0</td>\n",
       "      <td>2.110436</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>TTTCTACTGAGGCA-1</th>\n",
       "      <td>1227</td>\n",
       "      <td>1224</td>\n",
       "      <td>3443.0</td>\n",
       "      <td>32.0</td>\n",
       "      <td>0.929422</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>526 rows × 6 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                  n_genes  n_genes_by_counts  total_counts  total_counts_mt  \\\n",
       "index                                                                         \n",
       "AAACATTGAGCTAC-1     1352               1352        4903.0            186.0   \n",
       "AAACATTGATCAGC-1     1131               1129        3147.0             28.0   \n",
       "AAACTTGAAAAACG-1     1116               1112        3914.0            103.0   \n",
       "AAAGAGACGCGAGA-1     1059               1058        3033.0             43.0   \n",
       "AAAGCAGATATCGG-1     1424               1422        4584.0             64.0   \n",
       "...                   ...                ...           ...              ...   \n",
       "TTTAGCTGTACTCT-1     1567               1560        5671.0            120.0   \n",
       "TTTATCCTGTTGTG-1     1156               1155        3679.0             48.0   \n",
       "TTTCGAACACCTGA-1     1544               1539        4455.0             58.0   \n",
       "TTTCGAACTCTCAT-1     1155               1153        3459.0             73.0   \n",
       "TTTCTACTGAGGCA-1     1227               1224        3443.0             32.0   \n",
       "\n",
       "                  pct_counts_mt leiden  \n",
       "index                                   \n",
       "AAACATTGAGCTAC-1       3.793596      2  \n",
       "AAACATTGATCAGC-1       0.889736      0  \n",
       "AAACTTGAAAAACG-1       2.631579      2  \n",
       "AAAGAGACGCGAGA-1       1.417738      1  \n",
       "AAAGCAGATATCGG-1       1.396161      1  \n",
       "...                         ...    ...  \n",
       "TTTAGCTGTACTCT-1       2.116029      6  \n",
       "TTTATCCTGTTGTG-1       1.304702      1  \n",
       "TTTCGAACACCTGA-1       1.301908      6  \n",
       "TTTCGAACTCTCAT-1       2.110436      1  \n",
       "TTTCTACTGAGGCA-1       0.929422      2  \n",
       "\n",
       "[526 rows x 6 columns]"
      ]
     },
     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#subset\n",
    "pbmc_sub = pbmc[pbmc.obs.n_genes > 1000,:]\n",
    "pbmc_sub.obs"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>n_genes</th>\n",
       "      <th>n_genes_by_counts</th>\n",
       "      <th>total_counts</th>\n",
       "      <th>total_counts_mt</th>\n",
       "      <th>pct_counts_mt</th>\n",
       "      <th>leiden</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>AAACATTGAGCTAC-2</th>\n",
       "      <td>1352</td>\n",
       "      <td>1352</td>\n",
       "      <td>4903.0</td>\n",
       "      <td>186.0</td>\n",
       "      <td>3.793596</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>AAACATTGATCAGC-2</th>\n",
       "      <td>1131</td>\n",
       "      <td>1129</td>\n",
       "      <td>3147.0</td>\n",
       "      <td>28.0</td>\n",
       "      <td>0.889736</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>AAACTTGAAAAACG-2</th>\n",
       "      <td>1116</td>\n",
       "      <td>1112</td>\n",
       "      <td>3914.0</td>\n",
       "      <td>103.0</td>\n",
       "      <td>2.631579</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>AAAGAGACGCGAGA-2</th>\n",
       "      <td>1059</td>\n",
       "      <td>1058</td>\n",
       "      <td>3033.0</td>\n",
       "      <td>43.0</td>\n",
       "      <td>1.417738</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>AAAGCAGATATCGG-2</th>\n",
       "      <td>1424</td>\n",
       "      <td>1422</td>\n",
       "      <td>4584.0</td>\n",
       "      <td>64.0</td>\n",
       "      <td>1.396161</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>TTTAGCTGTACTCT-2</th>\n",
       "      <td>1567</td>\n",
       "      <td>1560</td>\n",
       "      <td>5671.0</td>\n",
       "      <td>120.0</td>\n",
       "      <td>2.116029</td>\n",
       "      <td>6</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>TTTATCCTGTTGTG-2</th>\n",
       "      <td>1156</td>\n",
       "      <td>1155</td>\n",
       "      <td>3679.0</td>\n",
       "      <td>48.0</td>\n",
       "      <td>1.304702</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>TTTCGAACACCTGA-2</th>\n",
       "      <td>1544</td>\n",
       "      <td>1539</td>\n",
       "      <td>4455.0</td>\n",
       "      <td>58.0</td>\n",
       "      <td>1.301908</td>\n",
       "      <td>6</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>TTTCGAACTCTCAT-2</th>\n",
       "      <td>1155</td>\n",
       "      <td>1153</td>\n",
       "      <td>3459.0</td>\n",
       "      <td>73.0</td>\n",
       "      <td>2.110436</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>TTTCTACTGAGGCA-2</th>\n",
       "      <td>1227</td>\n",
       "      <td>1224</td>\n",
       "      <td>3443.0</td>\n",
       "      <td>32.0</td>\n",
       "      <td>0.929422</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>526 rows × 6 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                  n_genes  n_genes_by_counts  total_counts  total_counts_mt  \\\n",
       "AAACATTGAGCTAC-2     1352               1352        4903.0            186.0   \n",
       "AAACATTGATCAGC-2     1131               1129        3147.0             28.0   \n",
       "AAACTTGAAAAACG-2     1116               1112        3914.0            103.0   \n",
       "AAAGAGACGCGAGA-2     1059               1058        3033.0             43.0   \n",
       "AAAGCAGATATCGG-2     1424               1422        4584.0             64.0   \n",
       "...                   ...                ...           ...              ...   \n",
       "TTTAGCTGTACTCT-2     1567               1560        5671.0            120.0   \n",
       "TTTATCCTGTTGTG-2     1156               1155        3679.0             48.0   \n",
       "TTTCGAACACCTGA-2     1544               1539        4455.0             58.0   \n",
       "TTTCGAACTCTCAT-2     1155               1153        3459.0             73.0   \n",
       "TTTCTACTGAGGCA-2     1227               1224        3443.0             32.0   \n",
       "\n",
       "                  pct_counts_mt leiden  \n",
       "AAACATTGAGCTAC-2       3.793596      2  \n",
       "AAACATTGATCAGC-2       0.889736      0  \n",
       "AAACTTGAAAAACG-2       2.631579      2  \n",
       "AAAGAGACGCGAGA-2       1.417738      1  \n",
       "AAAGCAGATATCGG-2       1.396161      1  \n",
       "...                         ...    ...  \n",
       "TTTAGCTGTACTCT-2       2.116029      6  \n",
       "TTTATCCTGTTGTG-2       1.304702      1  \n",
       "TTTCGAACACCTGA-2       1.301908      6  \n",
       "TTTCGAACTCTCAT-2       2.110436      1  \n",
       "TTTCTACTGAGGCA-2       0.929422      2  \n",
       "\n",
       "[526 rows x 6 columns]"
      ]
     },
     "execution_count": 19,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pbmc_sub.obs_names = [i.replace(\"-1\",\"-2\") for i in pbmc_sub.obs_names]\n",
    "pbmc_sub.obs"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    " Concatenation: https://anndata.readthedocs.io/en/latest/generated/anndata.concat.html#anndata.concat"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "AnnData object with n_obs × n_vars = 3164 × 1838\n",
       "    obs: 'n_genes', 'n_genes_by_counts', 'total_counts', 'total_counts_mt', 'pct_counts_mt', 'leiden', 'batch'\n",
       "    obsm: 'X_pca', 'X_umap'"
      ]
     },
     "execution_count": 21,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pbmc_concat = ad.concat([pbmc,pbmc_sub],keys=[\"pbmc\",\"pbmc_sub\"],label=\"batch\",join=\"inner\")\n",
    "pbmc_concat"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "AnnData object with n_obs × n_vars = 3164 × 1838\n",
       "    obs: 'n_genes', 'n_genes_by_counts', 'total_counts', 'total_counts_mt', 'pct_counts_mt', 'leiden', 'batch'\n",
       "    var: 'gene_ids', 'n_cells', 'mt', 'n_cells_by_counts', 'mean_counts', 'pct_dropout_by_counts', 'total_counts', 'highly_variable', 'means', 'dispersions', 'dispersions_norm', 'mean', 'std'\n",
       "    obsm: 'X_pca', 'X_umap'\n",
       "    varm: 'PCs'"
      ]
     },
     "execution_count": 23,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pbmc_concat_same = ad.concat([pbmc,pbmc_sub],keys=[\"pbmc\",\"pbmc_sub\"],label=\"batch\",join=\"inner\",merge=\"same\")\n",
    "pbmc_concat_same"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
